packages <- c('ggbump','tidyverse','laeken','MetBrewer','sf','ggiraph')
install.packages(setdiff(packages, rownames(installed.packages())))
rm(packages)
library(ggbump)
library(tidyverse)
library(laeken)
library(MetBrewer)
library(sf)
library(ggiraph)
# import pdata
pdata <- readRDS('SHIWpdata.rds')
# Inequality indexes -----------------------------------------------------------
## Gini - Income ----
### Over regions by year
as_tibble(gini(pdata$eqhincome,
weights = pdata$peso,
years = pdata$anno,
breakdown = pdata$ireg,
na.rm = T
)[['valueByStratum']]) -> giniInc
giniInc$stratum <- gsub(' - ', '-', giniInc$stratum)
### ranking
giniInc |>
group_by(year) |>
mutate(
rank = rank(value)
) -> giniInc
### rounding off value
giniInc$value <- round(giniInc$value, 2)
### ranking5
giniInc$rank5 <- case_match(giniInc$rank,
1:4 ~ 1,
5:8 ~ 2,
9:12 ~ 3,
13:16 ~ 4,
17:20 ~ 5
)
### Total by year
as_tibble_col(gini(pdata$eqhincome,
weights = pdata$peso,
years = pdata$anno,
na.rm = T
)[["value"]]) -> giniIncTot
giniIncTot$stratum <- 'Italia'
giniIncTot$year <- c(2000, 2002, 2004, 2008, 2010, 2012, 2014, 2016, 2020)
giniIncTot$rank <- NA
giniInc <- rbind(giniInc, giniIncTot)
rm(giniIncTot)
## Gini - wealth ----
### Over regions by year
as_tibble(gini(pdata$pcwealth,
weights = pdata$peso,
years = pdata$anno,
breakdown = pdata$ireg,
na.rm = T
)[['valueByStratum']]) -> giniW
giniW$stratum <- gsub(' - ', '-', giniW$stratum)
### ranking
giniW |>
group_by(year) |>
mutate(
rank = rank(value)
) -> giniW
### ranking5
giniW$rank5 <- case_match(giniW$rank,
1:4 ~ 1,
5:8 ~ 2,
9:12 ~ 3,
13:16 ~ 4,
17:20 ~ 5
)
### rounding off value
giniW$value <- round(giniW$value, 2)
### Total by year
as_tibble_col(gini(pdata$pcwealth,
weights = pdata$peso,
years = pdata$anno,
na.rm = T
)[["value"]]) -> giniWTot
giniWTot$stratum <- 'Italia'
giniWTot$year <- c(2000, 2002, 2004, 2008, 2010, 2012, 2014, 2016, 2020)
giniWTot$rank <- NA
giniW <- rbind(giniW, giniWTot)
rm(giniWTot)
# Poverty ---------------------------------------------------------------------
## Head count ----
### Over region by year
pdata |>
group_by(anno, ireg) |>
count(pov) -> pov
pdata |>
group_by(anno, ireg) |>
count(!pov) -> pov$'!pov'
pov |>
mutate('!pov' = `!pov`$n) |>
filter(pov == 1) |>
mutate(pov = n) |>
select(!n) -> pov
pov |>
mutate(hCount = pov/(sum(pov, `!pov`))) -> pov
### Ranking
pov |>
group_by(anno) |>
mutate(
rankHCount = 21-rank(hCount)
) -> pov
## Poverty intensity ----
### povLine
pdata |>
group_by(anno) |>
summarise(povLine = weightedMedian(eqhincome,
weights = peso)*0.6) -> povLine
pov <- left_join(pov, povLine)
rm(povLine)
### Poverty gap
avPoor <- pdata |>
filter(pov == 1) |>
group_by(anno, ireg) |>
summarise(avPoor = weightedMean(eqhincome, weights = peso))
pov <- left_join(pov, avPoor)
rm(avPoor)
pov$povGapIndex <- pov$hCount * (pov$povLine - pov$avPoor)/pov$povLine
### Ranking
pov |>
group_by(anno) |>
mutate(
rankPovGap = 21-rank(povGapIndex)
) -> pov
## LPM risk of being in poverty ----
pdata <- within(pdata, cfedu <- relevel(factor(cfedu), ref = 'Specializzazione post-laurea'))
pdata <- within(pdata, cfsex <- relevel(factor(cfsex), ref = 'Maschile'))
pdataReg <- pdata |>
filter(anno == 2020)
### Regression models
lpm0 <- lm(pov ~ factor(cfedu),
weights = pesopop,
data = pdataReg)
lpm1 <- lm(pov ~ factor(cfedu) + factor(cfsex) + factor(cfclass),
weights = pesopop,
data = pdataReg)
### Harvesting results
lpm0results <- as_tibble(summary(lpm0)[["coefficients"]])
lpm0results <- cbind(lpm0results, confint(lpm0, level=0.95)) #adding CIs
lpm1results <- as_tibble(summary(lpm1)[["coefficients"]])
lpm1results <- cbind(lpm1results, confint(lpm1, level=0.95)) #adding CIs
lpm0results <- lpm0results %>% setNames(paste0('0.', names(.)))
### Cleaning results
lpm0results <- lpm0results |>
rownames_to_column(var = 'reg')
lpm1results <- lpm1results |>
rownames_to_column(var = 'reg')
lpmresults <- full_join(lpm0results, lpm1results)
rm(lpm0results, lpm1results, lpm0, lpm1)
lpmresults['reg'][lpmresults['reg'] == '(Intercept)'] <- ')Intercept'
lpmresults <- lpmresults |>
mutate(reg = str_split_fixed(reg, fixed(')'), n = Inf))
lpmresults <- within(lpmresults, reg <- reg[,2])
lpmresults$'0.star' <- ifelse(lpmresults$`0.Pr(>|t|)` <= 0.001, '***',
ifelse(lpmresults$`0.Pr(>|t|)` <= 0.01, '**',
ifelse(lpmresults$`0.Pr(>|t|)` <= 0.05, '*',
ifelse(lpmresults$`0.Pr(>|t|)` <= 0.1, '.', ''))))
lpmresults$star<- ifelse(lpmresults$`Pr(>|t|)` <= 0.001, '***',
ifelse(lpmresults$`Pr(>|t|)` <= 0.01, '**',
ifelse(lpmresults$`Pr(>|t|)` <= 0.05, '*',
ifelse(lpmresults$`Pr(>|t|)` <= 0.1, '.', ''))))
lpmresults <- lpmresults |>
relocate('0.star', .before = Estimate)
lpmresults$vars <- c('Intercetta', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Sesso', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale')